#TEXT_J_all_distanceo_cap_scale_0_1 <- sqrt(mulit_value_replace_distane^2+continues_euclideano_cap_0_1^2)
#weight_steamspy_tags_order_cosinedist<- cosinedist(weight_steamspy_tags_order)
#weight_steamspy_tags_order_jaccardc<-(1-jaccard(as.matrix(weight_steamspy_tags_order),10)[-1])[1:(length(cosine_dist_mulit_value))]
cosine_dist_mulit_value_PDN <- cosinedist(cbind(my_cosine,my_cosine_PDN))
mulit_value_replace_distane_PDN <- sqrt(cosine_dist_mulit_value_PDN^2+owners_jaccardo^2+eng_jaccardo^2+required_jaccardo^2+plat_jaccardo^2+(1.5*weight_steamspy_tags_order_cosinedist)^2)
#mulit_value_replace_distane_PDN <- sqrt(cosine_dist_mulit_value_PDN^2+owners_jaccardo^2+eng_jaccardo^2+required_jaccardo^2+plat_jaccardo^2+(1*cosine_dist_mulit_value)^2+(1*weight_steamspy_tags_order_cosinedist)^2)
TEXT_COSINE_all_distanceo_cap_scale_0_1 <- sqrt(mulit_value_replace_distane^2+continues_euclideano_cap_scale_0_1^2)
TEXT_COSINE_all_distanceo_cap_scale_0_1_PDN <- sqrt(mulit_value_replace_distane_PDN^2+continues_euclideano_cap_scale_0_1^2)
#parLapply(cl, all_distanceo_cap_scale_1_2, function(x,k=3) { cmdscale(x, k=3) })
cpu.cores <- detectCores()
cl <- makeCluster(cpu.cores)
#TEXT_COSINE_all_distanceo_cap_scale_0_1
#TEXT_J_all_distanceo_cap_scale_0_1
fitall343d <- cmdscale(TEXT_COSINE_all_distanceo_cap_scale_0_1_PDN,eig=TRUE, k=3)
x <- fitall343d$points[,1]
length(x)
## [1] 8727
y <- fitall343d$points[,2]
z <- fitall343d$points[,3]
#plot3d(x, y,z, xlab="Coordinate 1", ylab="Coordinate 2",
# main="fite3d",col = colors)
#text3d(x=x, y=y, z=z,texts = row.names(train_datasetG_fifth), cex=.5)
#plot3d(x=x, y=y, z=z,xlab="PC1", ylab="PC2", zlab="PC3", size=5)
#text3d(x=x,y=y,z=z,texts=names, col=4)
#stopCluster(cl)
## Read the 8727 x 8727 data matrix successfully!
## OpenMP is working. 1 threads.
## Using no_dims = 3, perplexity = 50.000000, and theta = 0.500000
## Computing input similarities...
## Building tree...
## Done in 18.51 seconds (sparsity = 0.023570)!
## Learning embedding...
## Iteration 50: error is 89.806484 (50 iterations in 2.24 seconds)
## Iteration 100: error is 89.806268 (50 iterations in 2.71 seconds)
## Iteration 150: error is 89.153601 (50 iterations in 2.85 seconds)
## Iteration 200: error is 88.073210 (50 iterations in 2.11 seconds)
## Iteration 250: error is 87.936330 (50 iterations in 2.15 seconds)
## Iteration 300: error is 2.720462 (50 iterations in 1.86 seconds)
## Iteration 350: error is 2.382119 (50 iterations in 1.70 seconds)
## Iteration 400: error is 2.219792 (50 iterations in 1.71 seconds)
## Iteration 450: error is 2.120450 (50 iterations in 1.74 seconds)
## Iteration 500: error is 2.054506 (50 iterations in 1.69 seconds)
## Iteration 550: error is 2.006654 (50 iterations in 1.68 seconds)
## Iteration 600: error is 1.970315 (50 iterations in 1.67 seconds)
## Iteration 650: error is 1.941960 (50 iterations in 1.69 seconds)
## Iteration 700: error is 1.920616 (50 iterations in 1.67 seconds)
## Fitting performed in 27.47 seconds.
## [1] 8727
"mycex_posi <- c()
for (i in 1:length(PLOTER$categories1)) {
if (PLOTER$positive_ratings[i] == summary(PLOTER$positive_ratings)[1]){
mycex2[i] <- 30
}else if (PLOTER$positive_ratings[i] == summary((PLOTER$positive_ratings))[2]){
mycex2[i] <- 20
}else if (PLOTER$positive_ratings[i] == summary((PLOTER$positive_ratings))[3]){
mycex2[i] <- 10
}else {
mycex2[i] <- 20
}
}
summary(PLOTER$positive_ratings)
table(mycex_posi)"
## [1] "mycex_posi <- c()\nfor (i in 1:length(PLOTER$categories1)) {\n if (PLOTER$positive_ratings[i] == summary(PLOTER$positive_ratings)[1]){\n mycex2[i] <- 30\n }else if (PLOTER$positive_ratings[i] == summary((PLOTER$positive_ratings))[2]){\n mycex2[i] <- 20\n }else if (PLOTER$positive_ratings[i] == summary((PLOTER$positive_ratings))[3]){\n mycex2[i] <- 10\n }else {\n mycex2[i] <- 20\n }\n }\n\nsummary(PLOTER$positive_ratings)\ntable(mycex_posi)"
#summary(negative_ratings_encod_cap)
colors <- c()
for (i in 1:length(PLOTER$steamspy_tags)) {
if (fifth_encod$steamspy_tags1[i] == "Action"){
colors[i] <- "Action1"
}else if (fifth_encod$steamspy_tags2[i] == "Indie"){
colors[i] <- "Indie2"
}else if (fifth_encod$steamspy_tags3[i] == "Indie"){
colors[i] <- "Indie2"
}else if (fifth_encod$steamspy_tags2[i] == "Action"){
colors[i] <- "Action2"
}else if (fifth_encod$steamspy_tags3[i] == "Action"){
colors[i] <- "Action2"
}else if (fifth_encod$steamspy_tags1[i] == "Indie"){
colors[i] <- "Indie1"
}else{
colors[i] <- "none"
}
}
table(colors)/length(colors)
## colors
## Action1 Action2 Indie1 Indie2 none
## 0.11642030 0.10622207 0.05568924 0.17875559 0.54291280
#colors()
ply<- plot_ly( x=x, y=y, z=z, type = "scatter3d",text= PLOTER$name,marker = list(symbol = 'circle', sizemode = 'diameter'),color=colors ,colors=c("#191970",'#9400D3',"#E68AB8","white")
,size = PLOTER$positive_ratings, sizes = c(2,50),width = NULL)
#row.names(fifth_encod)
#c('green','black','white')
#c("#191970",'#9400D3',"#E68AB8","white")
?plot_ly
rating_log <- PLOTER$positive_ratings^1.5
plyt <- plot_ly(
x = x1,
y = y1,
z = z1,
type = "scatter3d",
text = row.names(fifth_encod),
marker = list(symbol = 'arrow-down-open', sizemode = 'diameter'),
color = colors,
colorscale = list(
c(0, colors[1]),
c(0.25, colors[2]),
c(0.75, colors[3]),
c(1, colors[4])
),
autocolorscale = FALSE,
size = rating_log,
sizes = c(2, 55),
width = NULL,
cauto = TRUE
)
#c('green','black','white')
#c("#191970",'#9400D3',"#E68AB8","white")
plot_ly Results
MDS
ply
#,name=row.names(train_datasetG_fifth)
#sum(fitall343d$eig[1:2])/sum(fitall343d$eig)
#sum(fitall343d$eig[1:3])/sum(fitall343d$eig)
t-SNE
# 找出游戏 "Counter-Strike: Global Offensive" 的行号
game_idx <- which(row.names(fifth_encod) == "Yakuza: Like a Dragon")
# 将该游戏坐标设为原点
x0 <- x1[game_idx]
y0 <- y1[game_idx]
z0 <- z1[game_idx]
# 计算其他点相对于该游戏坐标的位置
x_relative <- x1 - x0
y_relative <- y1 - y0
z_relative <- z1 - z0
# 将 x, y, z 分别除以最大值,使所有坐标值都在 [0, 1] 之间
x_normalized <- x_relative / max(x_relative)
y_normalized <- y_relative / max(y_relative)
z_normalized <- z_relative / max(z_relative)
# 计算每个点到原点的距离,并按距离从小到大排序
dist <- sqrt(x_normalized^2 + y_normalized^2 + z_normalized^2)
idx <- order(dist)
# 根据距离排序的顺序重新排列 x, y, z 和其他相关数据
x_sorted <- x_normalized[idx]
y_sorted <- y_normalized[idx]
z_sorted <- z_normalized[idx]
color_sorted <- colors[idx]
rating_sorted <- PLOTER$positive_ratings[idx]
# 将距离最远的一些点的颜色设为白色,以模拟星空中的恒星
num_stars <- length(dist)
num_sky <- round(num_stars * 0.2) # 设定 20% 的点为星空
color_sorted[(num_stars - num_sky + 1):num_stars] <- "white"
# 将评价数映射到 1-10 的范围内
rating_scaled <- scale(rating_sorted, center = min(rating_sorted), scale = max(rating_sorted) - min(rating_sorted)) * 9 + 1
# 绘制星空图
plot_ly(
x = x_sorted,
y = y_sorted,
z = z_sorted,
type = "scatter3d",
text = row.names(fifth_encod),
marker = list(symbol = "circle", sizemode = "diameter"),
color = color_sorted,
size = rating_sorted,
sizes = c(3, 35)
)
#size=(train_datasetG_fifth$positive_ratings_encod_cap)
plyt
#,name=row.names(train_datasetG_fifth)
#one_hot(as.data.table(fifth_encod[,2:4]))
library(Matrix)
jaccard <- function(m,c) {
## common values:
A = tcrossprod(m)
## indexes for non-zero common values
im = which(A > 0, arr.ind=TRUE)
## counts for each row
b = rowSums(m)
## only non-zero values of common
Aim = A[im]
## Jacard formula: #common / (#i + #j - #common)
J = sparseMatrix(
i = im[,1],
j = im[,2],
x = (Aim+c) / (b[im[,1]] + b[im[,2]] - Aim+c),
dims = dim(A)
)
return( J )
}
Matrix <- as.matrix(NORM)
sim <- Matrix / sqrt(rowSums(Matrix * Matrix))
sim <- sim %*% t(sim)
D_sim <- as.dist(1 - sim)
"NORM <- one_hot(as.data.table(fifth_encod[,2:4]))
tNORM <- t(NORM)
# NORM[1,],NORM[2,]
# tNORM[,1],tNORM[,2]
trydist <- matrix(rep(0,12881^2), nrow = nrow(NORM), ncol = nrow(NORM))
for (i in 1:10) {
for(j in (i+1):10){
trydist[i,j] <- 1-jaccard(tNORM[,i],tNORM[,j],0.01)
trydist[j,i] <- trydist[i,j]
}
}
jaccard(tNORM[,1],tNORM[,10])
Jaccard(tNORM[,1],tNORM[,20],0.01)
(1-jaccard(t(tNORM),0))[1:10]"
## [1] "NORM <- one_hot(as.data.table(fifth_encod[,2:4]))\n\n\ntNORM <- t(NORM)\n\n\n# NORM[1,],NORM[2,]\n# tNORM[,1],tNORM[,2]\n\ntrydist <- matrix(rep(0,12881^2), nrow = nrow(NORM), ncol = nrow(NORM))\nfor (i in 1:10) {\n \n \n for(j in (i+1):10){\n \n trydist[i,j] <- 1-jaccard(tNORM[,i],tNORM[,j],0.01)\n trydist[j,i] <- trydist[i,j] \n }\n}\n\n\njaccard(tNORM[,1],tNORM[,10])\nJaccard(tNORM[,1],tNORM[,20],0.01)\n\n(1-jaccard(t(tNORM),0))[1:10]"
#(1-jaccard(as.matrix(one_hot(as.data.table(fifth_encod[,1]))),0))[1:30]
#eng_jaccardo[1:30]
#plat_jaccardo[1:30]
#1-jaccard(as.matrix(one_hot(as.data.table(fifth_encod[,2:4]))),0)[-1][1:30]
#sum(plat_jaccardo[1:3000]-(1-jaccard(as.matrix(one_hot(as.data.table(fifth_encod[,2:4]))),0)[-1][1:3000]))
#(1-jaccard(as.matrix(one_hot(as.data.table(fifth_encod[,2:4]))),0)[-1])
#jaccard(as.matrix(one_hot(as.data.table(fifth_encod[,5]))),5)[1:10]